The purpose of this file is to work through an example of how to aggregate regional analysis results from Conveyal. The example data used here is a regional analysis that measures access to non-emergency healthcare destinations in the MPO region.
The goal here is to work out how to aggregate results so that we can answer the question: “How does access to basic healthcare services differ between EJ and non-EJ populations?”
# Raster processing
library(tidyverse)
library(sf)
library(stars)
library(mapview)
library(kableExtra)
# INPUTS ####
# read in census geog layers
mpo_census_tracts<- st_read("output/demographic_data.gpkg", layer= "tracts_acs_dec_2020") %>%
st_transform(3857)
## Reading layer `tracts_acs_dec_2020' from data source
## `C:\GitHub\existing-inequities\output\demographic_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 811 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -71.66063 ymin: 42.01446 xmax: -70.57338 ymax: 42.73887
## Geodetic CRS: NAD83
boundary<-st_read("output/AggregationAreas.gpkg", layer= "MPO_Boundary") %>%
st_transform(3857) # needs to be psudo-mercator for raster operations
## Reading layer `MPO_Boundary' from data source
## `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg'
## using driver `GPKG'
## Simple feature collection with 1 feature and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 186779.3 ymin: 862697.3 xmax: 275996 ymax: 943384.9
## Projected CRS: NAD83 / Massachusetts Mainland
communitytypes<- st_read("output/AggregationAreas.gpkg", layer= "CommunityTypes") %>% st_transform(3857)
## Reading layer `CommunityTypes' from data source
## `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg'
## using driver `GPKG'
## Simple feature collection with 8 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 186779.3 ymin: 862697.3 xmax: 275996 ymax: 943384.9
## Projected CRS: NAD83 / Massachusetts Mainland
housingsubmarkets <- st_read("output/AggregationAreas.gpkg", layer="HousingSubmarkets") %>% st_transform(3857)
## Reading layer `HousingSubmarkets' from data source
## `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg'
## using driver `GPKG'
## Simple feature collection with 7 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 186784.2 ymin: 861634.6 xmax: 274511.9 ymax: 943390.9
## Projected CRS: NAD83 / Massachusetts Mainland
# read in geotiff
access_layer<- read_stars("data/ConveyalRuns/HealthCareTests/HealthCare_NonEmergency_AMPeak_TransitBusRT_45min_50pct.geotiff")
destination <- "Non-emergency Healthcare"
time_period <- "AM Peak"
modes <- "Transit (Bus and Rapid Transit only)"
travel_time <- "45 minutes"
# read in destinations used for conveyal analysis
dest<- st_read("output/DestinationData.gpkg", layer= "healthcare_PT") %>%
st_transform(3857)
## Reading layer `healthcare_PT' from data source
## `C:\GitHub\existing-inequities\output\DestinationData.gpkg'
## using driver `GPKG'
## Simple feature collection with 471 features and 4 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 194609.1 ymin: 869927.6 xmax: 269400.3 ymax: 930973.2
## Projected CRS: NAD83 / Massachusetts Mainland
# note conveyal uses lodes workers home locations as a proxy for population density.
# the workers tiff is in stored in the same grid as the conveyal access output.
# To apply demographics we would still need to go through a process similar to option 2
# read in conveyal workers geotiff
# workers <- read_stars("data/")
First, crop the raster to the boundary extent.
# CROP Access Raster to the MPO Boundary
access_layer_crop <- st_crop(access_layer, boundary)
ggplot()+
geom_stars(data=access_layer_crop, alpha = .7)+
# geom_sf(data=access_contours, aes(color = value),size=.8, show.legend = F)+
# geom_contour(data=access_layer_crop)+
geom_sf(data=boundary,size=1,color="light gray", fill= NA)+
coord_sf()+
scale_fill_viridis_c(option = "D", na.value = "transparent",
name = paste0(destination, "\n Opportunities Accessible"))+
# scale_color_viridis_c(option = "D", na.value = "transparent",
# name = paste0(destination, "\n Opportunities Accessible"))+
ggtitle(paste0("Access to ", destination, "\nwith ",
modes, "\nin ", travel_time))+
labs(caption= paste0("Time period: ", time_period))+
theme_void()
Find the average number of destinations accessible within a census geography. Then multiply average opportunities accessible from the tract by demographic population.
# Option 1: find the average number of destinations accessible within a census tract
# This process finds the average number of opportunities accessible for cells that are within a given census tract geometry. Then multiplies the number of people from a given population in the tract by the average number of opportunities accessible to them.
# http://132.72.155.230:3838/r/combining-rasters-and-vector-layers.html#extracting-to-polygons-single-band
opps_by_tract<- access_layer_crop %>%
aggregate(mpo_census_tracts, mean, na.rm=T) %>%
st_as_sf() %>%
rename(opps= 1) # rename the avg opportunities column to "opps"
# join tract level demographic data by matching tract geog used to aggregate
opps_by_tract_demo<- mpo_census_tracts %>%
st_join(opps_by_tract, join = st_equals) %>%
mutate(
people_opps = round(pop_dec*opps),
opps_per_person = ifelse(pop_dec == 0, 0, round(opps/pop_dec, 3)),
min_opps = round(minority*opps),
nonmin_opps = round(nonminority*opps),
lowinc_opps = round(lowinc*opps),
nonlowinc_opps = round(nonlowinc*opps))%>%
mutate(id= row_number())
#select(GEOID, ends_with("_opps"))
opps_by_tract_demo %>%
select(GEOID, opps, pop_dec, ends_with("_opps")) %>%
st_drop_geometry() %>%
arrange(desc(min_opps)) %>%
head(10) %>%
kbl() %>%
kable_styling()
| GEOID | opps | pop_dec | people_opps | min_opps | nonmin_opps | lowinc_opps | nonlowinc_opps |
|---|---|---|---|---|---|---|---|
| 25025070202 | 191.50000 | 5460 | 1045590 | 832657 | 212933 | 570953 | 474637 |
| 25025010405 | 150.85714 | 6853 | 1033824 | 490958 | 542866 | 771291 | 262533 |
| 25025092000 | 93.45455 | 5180 | 484095 | 444724 | 39371 | 152855 | 331240 |
| 25025080601 | 140.22222 | 4732 | 663532 | 429054 | 234477 | 427771 | 235760 |
| 25025080500 | 135.75000 | 3288 | 446346 | 422834 | 23512 | 225046 | 221300 |
| 25025080801 | 130.62500 | 4282 | 559336 | 412482 | 146855 | 368989 | 190347 |
| 25025082100 | 88.14286 | 5224 | 460458 | 404728 | 55731 | 248495 | 211964 |
| 25017359400 | 133.21429 | 6754 | 899729 | 404574 | 495156 | 171445 | 728284 |
| 25025090100 | 80.44444 | 5171 | 415978 | 399386 | 16592 | 220728 | 195250 |
| 25025170702 | 66.30769 | 7995 | 530130 | 397274 | 132856 | 255179 | 274951 |
mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "min_opps", layer.name = "Minority Opps")+
mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "nonmin_opps", layer.name = "Nonminority Opps")+
mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "lowinc_opps", layer.name = "Low-Income Opps")+
mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "nonlowinc_opps", layer.name = "Non-low-income Opps")
To summarize region-wide:
demo_summary_by_tract <- opps_by_tract_demo %>%
st_drop_geometry() %>%
ungroup() %>% # or join and group by aggregation geometry instead
summarize(people_opps = sum(people_opps, na.rm = T),
min_opps = sum(min_opps, na.rm= T),
nonmin_opps = sum(nonmin_opps, na.rm=T),
lowinc_opps = sum(lowinc_opps, na.rm=T),
nonlowinc_opps = sum(nonlowinc_opps, na.rm=T))
pct_opps_min_tract = demo_summary_by_tract$min_opps[1]/(demo_summary_by_tract$min_opps[1]+demo_summary_by_tract$nonmin_opps)
pct_opps_lowinc_tract = demo_summary_by_tract$lowinc_opps[1]/(demo_summary_by_tract$lowinc_opps[1]+demo_summary_by_tract$nonlowinc_opps)
demo_summary_by_tract %>%
kbl() %>%
kable_styling()
| people_opps | min_opps | nonmin_opps | lowinc_opps | nonlowinc_opps |
|---|---|---|---|---|
| 105630671 | 49798208 | 55828738 | 31591435 | 74035183 |
Note some people-opps not assigned to demographics, because of ACS demo gaps and maybe treatment of region edges. Overall, minority people have 47% of region-wide healthcare opportunities accessible by transit (Bus and RT). And low-income people have `30% of region-wide healthcare opportunities accessible by transit (Bus and RT).
# Option 2 allocate opportunities from each raster cell demo populations using areas overlap and
tracts <- mpo_census_tracts %>%
mutate(area_tract = unclass(st_area(geom)))
# total_pop <- sum(tracts$pop_dec, na.rm=T)
access_crop_sf <- access_layer_crop %>%
st_as_sf() %>% # convert raster to sf object (each row is a raster cell as vector)
rename(opps = 1) %>%
mutate(area_cell = unclass(st_area(geometry)),
cell_id = row_number())
#summary(access_crop_sf$opps)
# total_opps <- sum(access_crop_sf$opps)
# avg_opps <- mean(access_crop_sf$opps)
opps_by_cell_demo<- access_crop_sf %>%
st_intersection(select(tracts,
GEOID, pop_dec,
percent_min, percent_nonmin,
percent_lowinc, percent_nonlowinc,
area_tract)) %>%
mutate(area_int = unclass(st_area(geometry))) %>%
mutate(pct_overlap_tract = area_int/area_tract,
pct_overlap_cell= area_int/area_cell) %>%
mutate(
pop_part= pop_dec*pct_overlap_tract,
opps_part = opps*pct_overlap_cell,
pop_opps = pop_part*opps_part,
min_opps = pop_opps*percent_min,
nonmin_opps = pop_opps*percent_nonmin,
lowinc_opps = pop_opps*percent_lowinc,
nonlowinc_opps = pop_opps*percent_nonlowinc) %>%
# group back into whole cells
group_by(cell_id) %>%
summarise(
pop_opps = sum(pop_opps, na.rm=T),
min_opps = sum(min_opps, na.rm=T),
nonmin_opps = sum(nonmin_opps, na.rm=T),
lowinc_opps = sum(lowinc_opps, na.rm=T),
nonlowinc_opps = sum(nonlowinc_opps, na.rm=T),
GEOIDs = paste0(unique(unlist(GEOID)), collapse = ' '),
geometry= st_union(geometry)) %>%
st_as_sf()
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
opps_by_cell_demo %>%
st_drop_geometry() %>%
arrange(desc(min_opps)) %>%
head() %>%
kbl() %>%
kable_styling()
| cell_id | pop_opps | min_opps | nonmin_opps | lowinc_opps | nonlowinc_opps | GEOIDs |
|---|---|---|---|---|---|---|
| 34056 | 286868.8 | 228443.53 | 58425.31 | 156642.10 | 130226.74 | 25025070103 25025070104 25025070202 |
| 34732 | 232862.9 | 114081.17 | 118781.72 | 133279.35 | 99583.54 | 25025010403 25025010500 25025010405 |
| 34286 | 140686.1 | 110605.31 | 30080.81 | 75328.86 | 65357.26 | 25025070103 25025070202 |
| 34285 | 147745.8 | 97868.66 | 49877.12 | 77238.36 | 70507.42 | 25025070201 25025070202 |
| 35182 | 210710.6 | 93953.55 | 116757.04 | 139024.35 | 71686.23 | 25025010404 25025010405 |
| 34055 | 139102.5 | 87598.66 | 51503.83 | 71913.44 | 67189.06 | 25025070201 25025070104 25025981700 25025070102 25025070202 |
To focus just on the cells associated with a specific tract:
opps_by_cell_focus <- opps_by_cell_demo %>%
filter(grepl( "25025071101", GEOIDs))
mapview(opps_by_cell_focus)
demo_summary_by_cell <- opps_by_cell_demo %>%
st_drop_geometry() %>%
ungroup() %>% # or join and group by aggregation geometry instead
summarize(people_opps = sum(pop_opps, na.rm = T),
min_opps = sum(min_opps, na.rm= T),
nonmin_opps = sum(nonmin_opps, na.rm=T),
lowinc_opps = sum(lowinc_opps, na.rm=T),
nonlowinc_opps = sum(nonlowinc_opps, na.rm=T))
pct_opps_min_cell = demo_summary_by_tract$min_opps[1]/(demo_summary_by_tract$min_opps[1]+demo_summary_by_tract$nonmin_opps)
pct_opps_lowinc_cell = demo_summary_by_tract$lowinc_opps[1]/(demo_summary_by_tract$lowinc_opps[1]+demo_summary_by_tract$nonlowinc_opps)
demo_summary_by_cell %>%
kbl() %>%
kable_styling()
| people_opps | min_opps | nonmin_opps | lowinc_opps | nonlowinc_opps |
|---|---|---|---|---|
| 78416142 | 36943053 | 41470286 | 23014335 | 55398710 |
Note some people-opps not assigned to demographics, because of ACS demo gaps and maybe treatment of region edges. Overall, minority people have 47% of region-wide healthcare opportunities accessible by transit (Bus and RT). And low-income people have `30% of region-wide healthcare opportunities accessible by transit (Bus and RT).
Note: Percent of opportunities is the same for the two options, but the total people opps are not.
# test <- opps_by_cell_demo %>%
# select(-cell_id,-GEOIDs) %>%
# st_as_stars()
#
# ggplot()+
# geom_stars(data=test$pop_opps, alpha = .7)+
# # geom_sf(data=access_contours, aes(color = value),size=.8, show.legend = F)+
# # geom_contour(data=access_layer_crop)+
# # geom_sf(data=boundary,size=1,color="light gray", fill= NA)+
# # coord_sf()+
# # scale_fill_viridis_c(option = "D", na.value = "transparent",
# # name = paste0(destination, "\n Opportunities Accessible"))+
# # scale_color_viridis_c(option = "D", na.value = "transparent",
# # name = paste0(destination, "\n Opportunities Accessible"))+
# ggtitle(paste0("Access to ", destination, "\nwith ",
# modes, "\nin ", travel_time))+
# # facet_wrap(~band)+
# coord_equal() +
# facet_wrap(~band) +
# theme_void() +
# scale_x_discrete(expand=c(0,0))+
# scale_y_discrete(expand=c(0,0))+
# #
# # labs(caption= paste0("Time period: ", time_period))+
# theme_void()